Source: https://cdn.mos.cms.futurecdn.net/WxgePFJHTNAQRBuP3g5hoM-650-80.jpg.webp import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import pandas as pd
import random
import math
import time
#---------------------------------------
from sklearn.linear_model import LinearRegression, BayesianRidge
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error
#----------------------------------------------------------------------
import datetime
import operator
plt.style.use('fivethirtyeight')
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
latest_data.head()
| FIPS | Admin2 | Province_State | Country_Region | Last_Update | Lat | Long_ | Confirmed | Deaths | Recovered | Active | Combined_Key | Incidence_Rate | Case-Fatality_Ratio | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NaN | NaN | NaN | Afghanistan | 2020-10-21 04:24:14 | 33.93911 | 67.709953 | 40369 | 1501 | 33790 | 5068.0 | Afghanistan | 103.669971 | 3.714349 |
| 1 | NaN | NaN | NaN | Albania | 2020-10-21 04:24:14 | 41.15330 | 20.168300 | 17651 | 458 | 10225 | 6968.0 | Albania | 613.350476 | 2.594754 |
| 2 | NaN | NaN | NaN | Algeria | 2020-10-21 04:24:14 | 28.03390 | 1.659600 | 54829 | 1873 | 38346 | 14610.0 | Algeria | 125.034654 | 3.416075 |
| 3 | NaN | NaN | NaN | Andorra | 2020-10-21 04:24:14 | 42.50630 | 1.521800 | 3623 | 62 | 2273 | 1288.0 | Andorra | 4689.057141 | 1.711289 |
| 4 | NaN | NaN | NaN | Angola | 2020-10-21 04:24:14 | -11.20270 | 17.873900 | 8049 | 251 | 3037 | 4761.0 | Angola | 24.490155 | 3.118400 |
confirmed_df.head()
| Province/State | Country/Region | Lat | Long | 1/22/20 | 1/23/20 | 1/24/20 | 1/25/20 | 1/26/20 | 1/27/20 | ... | 12/24/20 | 12/25/20 | 12/26/20 | 12/27/20 | 12/28/20 | 12/29/20 | 12/30/20 | 12/31/20 | 1/1/21 | 1/2/21 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NaN | Afghanistan | 33.93911 | 67.709953 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 50655 | 50810 | 50886 | 51039 | 51280 | 51350 | 51405 | 51526 | 51526 | 51526 |
| 1 | NaN | Albania | 41.15330 | 20.168300 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 54827 | 55380 | 55755 | 56254 | 56572 | 57146 | 57727 | 58316 | 58316 | 58991 |
| 2 | NaN | Algeria | 28.03390 | 1.659600 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 97007 | 97441 | 97857 | 98249 | 98631 | 98988 | 99311 | 99610 | 99897 | 100159 |
| 3 | NaN | Andorra | 42.50630 | 1.521800 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 7699 | 7756 | 7806 | 7821 | 7875 | 7919 | 7983 | 8049 | 8117 | 8166 |
| 4 | NaN | Angola | -11.20270 | 17.873900 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 17029 | 17099 | 17149 | 17240 | 17296 | 17371 | 17433 | 17553 | 17568 | 17608 |
5 rows × 351 columns
us_medical_data.head()
| Province_State | Country_Region | Last_Update | Lat | Long_ | Confirmed | Deaths | Recovered | Active | FIPS | Incident_Rate | People_Tested | People_Hospitalized | Mortality_Rate | UID | ISO3 | Testing_Rate | Hospitalization_Rate | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Alabama | US | 2020-10-21 04:30:32 | 32.3182 | -86.9023 | 175210 | 2805 | 74238.0 | 97485.0 | 1.0 | 3559.482255 | 1265575.0 | NaN | 1.607192 | 84000001 | USA | 25811.283890 | NaN |
| 1 | Alaska | US | 2020-10-21 04:30:32 | 61.3707 | -152.4044 | 11391 | 67 | 6681.0 | 4643.0 | 2.0 | 1557.115420 | 546525.0 | NaN | 0.588184 | 84000002 | USA | 74708.322796 | NaN |
| 2 | American Samoa | US | 2020-10-21 04:30:32 | -14.2710 | -170.1320 | 0 | 0 | NaN | 0.0 | 60.0 | 0.000000 | 1616.0 | NaN | NaN | 16 | ASM | 2904.333136 | NaN |
| 3 | Arizona | US | 2020-10-21 04:30:32 | 33.7298 | -111.4312 | 232937 | 5837 | 38705.0 | 188395.0 | 4.0 | 3200.248066 | 1647345.0 | NaN | 2.505828 | 84000004 | USA | 22632.354026 | NaN |
| 4 | Arkansas | US | 2020-10-21 04:30:32 | 34.9697 | -92.3731 | 100441 | 1728 | 90283.0 | 8430.0 | 5.0 | 3328.281094 | 1231652.0 | NaN | 1.720413 | 84000005 | USA | 40812.855971 | NaN |
cols = confirmed_df.keys()
print(cols)
Index(['Province/State', 'Country/Region', 'Lat', 'Long', '1/22/20', '1/23/20',
'1/24/20', '1/25/20', '1/26/20', '1/27/20',
...
'12/24/20', '12/25/20', '12/26/20', '12/27/20', '12/28/20', '12/29/20',
'12/30/20', '12/31/20', '1/1/21', '1/2/21'],
dtype='object', length=351)
confirmed = confirmed_df.loc[:, cols[4]:cols[-1]]
deaths = deaths_df.loc[:, cols[4]:cols[-1]]
recoveries = recoveries_df.loc[:, cols[4]:cols[-1]]
dates = confirmed.keys()
world_cases = []
total_deaths = []
mortality_rate = []
recovery_rate = []
total_recovered = []
total_active = []
for i in dates:
confirmed_sum = confirmed[i].sum()
death_sum = deaths[i].sum()
recovered_sum = recoveries[i].sum()
# confirmed, deaths, recovered, and active
world_cases.append(confirmed_sum)
total_deaths.append(death_sum)
total_recovered.append(recovered_sum)
total_active.append(confirmed_sum-death_sum-recovered_sum)
# calculate rates
mortality_rate.append(death_sum/confirmed_sum)
recovery_rate.append(recovered_sum/confirmed_sum)
def daily_increase(data):
d = []
for i in range(len(data)):
if i == 0:
d.append(data[0])
else:
d.append(data[i]-data[i-1])
return d
def moving_average(data, window_size):
moving_average = []
for i in range(len(data)):
if i + window_size < len(data):
moving_average.append(np.mean(data[i:i+window_size]))
else:
moving_average.append(np.mean(data[i:len(data)]))
return moving_average
# window size
window = 7
# confirmed cases
world_daily_increase = daily_increase(world_cases)
world_confirmed_avg= moving_average(world_cases, window)
world_daily_increase_avg = moving_average(world_daily_increase, window)
# deaths
world_daily_death = daily_increase(total_deaths)
world_death_avg = moving_average(total_deaths, window)
world_daily_death_avg = moving_average(world_daily_death, window)
# recoveries
world_daily_recovery = daily_increase(total_recovered)
world_recovery_avg = moving_average(total_recovered, window)
world_daily_recovery_avg = moving_average(world_daily_recovery, window)
# active
world_active_avg = moving_average(total_active, window)
days_since_1_22 = np.array([i for i in range(len(dates))]).reshape(-1, 1)
world_cases = np.array(world_cases).reshape(-1, 1)
total_deaths = np.array(total_deaths).reshape(-1, 1)
total_recovered = np.array(total_recovered).reshape(-1, 1)
days_in_future = 30
future_forcast = np.array([i for i in range(len(dates)+days_in_future)]).reshape(-1, 1)
adjusted_dates = future_forcast[:-30]
start = '1/22/2020'
start_date = datetime.datetime.strptime(start, '%m/%d/%Y')
future_forcast_dates = []
for i in range(len(future_forcast)):
future_forcast_dates.append((start_date + datetime.timedelta(days=i)).strftime('%m/%d/%Y'))
# slightly modify the data to fit the model better (regression models cannot pick the pattern)
X_train_confirmed, X_test_confirmed, y_train_confirmed, y_test_confirmed = train_test_split(days_since_1_22[50:], world_cases[50:], test_size=0.2, shuffle=False)
# svm_confirmed = svm_search.best_estimator_
svm_confirmed = SVR(shrinking=True, kernel='poly',gamma=0.01, epsilon=1,degree=3, C=0.1)
svm_confirmed.fit(X_train_confirmed, y_train_confirmed)
svm_pred = svm_confirmed.predict(future_forcast)
# check against testing data
svm_test_pred = svm_confirmed.predict(X_test_confirmed)
plt.plot(y_test_confirmed)
plt.plot(svm_test_pred)
plt.legend(['Test Data', 'SVM Predictions'])
print('MAE:', mean_absolute_error(svm_test_pred, y_test_confirmed))
print('MSE:',mean_squared_error(svm_test_pred, y_test_confirmed))
MAE: 1326548.419556894 MSE: 1965217939383.6392
# transform our data for polynomial regression
poly = PolynomialFeatures(degree=4)
poly_X_train_confirmed = poly.fit_transform(X_train_confirmed)
poly_X_test_confirmed = poly.fit_transform(X_test_confirmed)
poly_future_forcast = poly.fit_transform(future_forcast)
bayesian_poly = PolynomialFeatures(degree=5)
bayesian_poly_X_train_confirmed = bayesian_poly.fit_transform(X_train_confirmed)
bayesian_poly_X_test_confirmed = bayesian_poly.fit_transform(X_test_confirmed)
bayesian_poly_future_forcast = bayesian_poly.fit_transform(future_forcast)
# polynomial regression
linear_model = LinearRegression(normalize=True, fit_intercept=False)
linear_model.fit(poly_X_train_confirmed, y_train_confirmed)
test_linear_pred = linear_model.predict(poly_X_test_confirmed)
linear_pred = linear_model.predict(poly_future_forcast)
print('MAE:', mean_absolute_error(test_linear_pred, y_test_confirmed))
print('MSE:',mean_squared_error(test_linear_pred, y_test_confirmed))
MAE: 7945015.295903881 MSE: 79185296204042.3
print(linear_model.coef_)
[[-1.38287451e+06 3.14448519e+04 -2.02859763e+02 3.88791621e+00 -5.32255097e-03]]
plt.plot(y_test_confirmed)
plt.plot(test_linear_pred)
plt.legend(['Test Data', 'Polynomial Regression Predictions'])
<matplotlib.legend.Legend at 0x7fe6e68e3f10>
# bayesian ridge polynomial regression
tol = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2]
alpha_1 = [1e-7, 1e-6, 1e-5, 1e-4, 1e-3]
alpha_2 = [1e-7, 1e-6, 1e-5, 1e-4, 1e-3]
lambda_1 = [1e-7, 1e-6, 1e-5, 1e-4, 1e-3]
lambda_2 = [1e-7, 1e-6, 1e-5, 1e-4, 1e-3]
normalize = [True, False]
bayesian_grid = {'tol': tol, 'alpha_1': alpha_1, 'alpha_2' : alpha_2, 'lambda_1': lambda_1, 'lambda_2' : lambda_2,
'normalize' : normalize}
bayesian = BayesianRidge(fit_intercept=False)
bayesian_search = RandomizedSearchCV(bayesian, bayesian_grid, scoring='neg_mean_squared_error', cv=3, return_train_score=True, n_jobs=-1, n_iter=40, verbose=1)
bayesian_search.fit(bayesian_poly_X_train_confirmed, y_train_confirmed)
Fitting 3 folds for each of 40 candidates, totalling 120 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers. [Parallel(n_jobs=-1)]: Done 48 tasks | elapsed: 2.0s [Parallel(n_jobs=-1)]: Done 120 out of 120 | elapsed: 2.1s finished
RandomizedSearchCV(cv=3, estimator=BayesianRidge(fit_intercept=False),
n_iter=40, n_jobs=-1,
param_distributions={'alpha_1': [1e-07, 1e-06, 1e-05, 0.0001,
0.001],
'alpha_2': [1e-07, 1e-06, 1e-05, 0.0001,
0.001],
'lambda_1': [1e-07, 1e-06, 1e-05,
0.0001, 0.001],
'lambda_2': [1e-07, 1e-06, 1e-05,
0.0001, 0.001],
'normalize': [True, False],
'tol': [1e-06, 1e-05, 0.0001, 0.001,
0.01]},
return_train_score=True, scoring='neg_mean_squared_error',
verbose=1)
bayesian_search.best_params_
{'tol': 0.001,
'normalize': False,
'lambda_2': 1e-06,
'lambda_1': 0.001,
'alpha_2': 0.001,
'alpha_1': 1e-07}
bayesian_confirmed = bayesian_search.best_estimator_
test_bayesian_pred = bayesian_confirmed.predict(bayesian_poly_X_test_confirmed)
bayesian_pred = bayesian_confirmed.predict(bayesian_poly_future_forcast)
print('MAE:', mean_absolute_error(test_bayesian_pred, y_test_confirmed))
print('MSE:',mean_squared_error(test_bayesian_pred, y_test_confirmed))
MAE: 7142471.68637624 MSE: 63025963254224.75
plt.plot(y_test_confirmed)
plt.plot(test_bayesian_pred)
plt.legend(['Test Data', 'Bayesian Ridge Polynomial Predictions'])
<matplotlib.legend.Legend at 0x7fe6e6b11ad0>
# helper method for flattening the data, so it can be displayed on a bar graph
def flatten(arr):
a = []
arr = arr.tolist()
for i in arr:
a.append(i[0])
return a
adjusted_dates = adjusted_dates.reshape(1, -1)[0]
plt.figure(figsize=(16, 10))
plt.plot(adjusted_dates, world_cases)
plt.plot(adjusted_dates, world_confirmed_avg, linestyle='dashed', color='orange')
plt.title('# of Coronavirus Cases Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.legend(['Worldwide Coronavirus Cases', 'Moving Average {} Days'.format(window)], prop={'size': 20})
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
plt.figure(figsize=(16, 10))
plt.plot(adjusted_dates, total_deaths)
plt.plot(adjusted_dates, world_death_avg, linestyle='dashed', color='orange')
plt.title('# of Coronavirus Deaths Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.legend(['Worldwide Coronavirus Deaths', 'Moving Average {} Days'.format(window)], prop={'size': 20})
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
plt.figure(figsize=(16, 10))
plt.plot(adjusted_dates, total_recovered)
plt.plot(adjusted_dates, world_recovery_avg, linestyle='dashed', color='orange')
plt.title('# of Coronavirus Recoveries Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.legend(['Worldwide Coronavirus Recoveries', 'Moving Average {} Days'.format(window)], prop={'size': 20})
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
plt.figure(figsize=(16, 10))
plt.plot(adjusted_dates, total_active)
plt.plot(adjusted_dates, world_active_avg, linestyle='dashed', color='orange')
plt.title('# of Coronavirus Active Cases Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Active Cases', size=30)
plt.legend(['Worldwide Coronavirus Active Cases', 'Moving Average {} Days'.format(window)], prop={'size': 20})
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
plt.figure(figsize=(16, 10))
plt.bar(adjusted_dates, world_daily_increase)
plt.plot(adjusted_dates, world_daily_increase_avg, color='orange', linestyle='dashed')
plt.title('World Daily Increases in Confirmed Cases', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.legend(['Moving Average {} Days'.format(window), 'World Daily Increase in COVID-19 Cases'], prop={'size': 20})
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
plt.figure(figsize=(16, 10))
plt.bar(adjusted_dates, world_daily_death)
plt.plot(adjusted_dates, world_daily_death_avg, color='orange', linestyle='dashed')
plt.title('World Daily Increases in Confirmed Deaths', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.legend(['Moving Average {} Days'.format(window), 'World Daily Increase in COVID-19 Deaths'], prop={'size': 20})
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
plt.figure(figsize=(16, 10))
plt.bar(adjusted_dates, world_daily_recovery)
plt.plot(adjusted_dates, world_daily_recovery_avg, color='orange', linestyle='dashed')
plt.title('World Daily Increases in Confirmed Recoveries', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.legend(['Moving Average {} Days'.format(window), 'World Daily Increase in COVID-19 Recoveries'], prop={'size': 20})
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
plt.figure(figsize=(16, 10))
plt.plot(adjusted_dates, np.log10(world_cases))
plt.title('Log of # of Coronavirus Cases Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
plt.figure(figsize=(16, 10))
plt.plot(adjusted_dates, np.log10(total_deaths))
plt.title('Log of # of Coronavirus Deaths Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
plt.figure(figsize=(16, 10))
plt.plot(adjusted_dates, np.log10(total_recovered))
plt.title('Log of # of Coronavirus Recoveries Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
def country_plot(x, y1, y2, y3, y4, country):
# window is set as 14 in in the beginning of the notebook
confirmed_avg = moving_average(y1, window)
confirmed_increase_avg = moving_average(y2, window)
death_increase_avg = moving_average(y3, window)
recovery_increase_avg = moving_average(y4, window)
plt.figure(figsize=(16, 10))
plt.plot(x, y1)
plt.plot(x, confirmed_avg, color='red', linestyle='dashed')
plt.legend(['{} Confirmed Cases'.format(country), 'Moving Average {} Days'.format(window)], prop={'size': 20})
plt.title('{} Confirmed Cases'.format(country), size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
plt.figure(figsize=(16, 10))
plt.bar(x, y2)
plt.plot(x, confirmed_increase_avg, color='red', linestyle='dashed')
plt.legend(['Moving Average {} Days'.format(window), '{} Daily Increase in Confirmed Cases'.format(country)], prop={'size': 20})
plt.title('{} Daily Increases in Confirmed Cases'.format(country), size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
plt.figure(figsize=(16, 10))
plt.bar(x, y3)
plt.plot(x, death_increase_avg, color='red', linestyle='dashed')
plt.legend(['Moving Average {} Days'.format(window), '{} Daily Increase in Confirmed Deaths'.format(country)], prop={'size': 20})
plt.title('{} Daily Increases in Deaths'.format(country), size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
plt.figure(figsize=(16, 10))
plt.bar(x, y4)
plt.plot(x, recovery_increase_avg, color='red', linestyle='dashed')
plt.legend(['Moving Average {} Days'.format(window), '{} Daily Increase in Confirmed Recoveries'.format(country)], prop={'size': 20})
plt.title('{} Daily Increases in Recoveries'.format(country), size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
# helper function for getting country's cases, deaths, and recoveries
def get_country_info(country_name):
country_cases = []
country_deaths = []
country_recoveries = []
for i in dates:
country_cases.append(confirmed_df[confirmed_df['Country/Region']==country_name][i].sum())
country_deaths.append(deaths_df[deaths_df['Country/Region']==country_name][i].sum())
country_recoveries.append(recoveries_df[recoveries_df['Country/Region']==country_name][i].sum())
return (country_cases, country_deaths, country_recoveries)
def country_visualizations(country_name):
country_info = get_country_info(country_name)
country_cases = country_info[0]
country_deaths = country_info[1]
country_recoveries = country_info[2]
country_daily_increase = daily_increase(country_cases)
country_daily_death = daily_increase(country_deaths)
country_daily_recovery = daily_increase(country_recoveries)
country_plot(adjusted_dates, country_cases, country_daily_increase, country_daily_death, country_daily_recovery, country_name)
countries = ['Canada','Switzerland','Australia','US', 'Brazil','Australia''Italy']
for country in countries:
country_visualizations(country)
# Country Comparison
# removed redundant code
compare_countries = ['Switzerland','Canada','US', 'Brazil','Australia']
graph_name = ['Coronavirus Confirmed Cases', 'Coronavirus Confirmed Deaths', 'Coronavirus Confirmed Recoveries']
for num in range(3):
plt.figure(figsize=(16, 10))
for country in compare_countries:
plt.plot(get_country_info(country)[num])
plt.legend(compare_countries, prop={'size': 20})
plt.xlabel('Days since 3/1', size=30)
plt.ylabel('# of Cases', size=30)
plt.title(graph_name[num], size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
def plot_predictions(x, y, pred, algo_name, color):
plt.figure(figsize=(16, 10))
plt.plot(x, y)
plt.plot(future_forcast, pred, linestyle='dashed', color=color)
plt.title('Worldwide Coronavirus Cases Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.legend(['Confirmed Cases', algo_name], prop={'size': 20})
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
plot_predictions(adjusted_dates, world_cases, svm_pred, 'SVM Predictions', 'purple')
plot_predictions(adjusted_dates, world_cases, linear_pred, 'Polynomial Regression Predictions', 'orange')
plot_predictions(adjusted_dates, world_cases, bayesian_pred, 'Bayesian Ridge Regression Predictions', 'green')
# Future predictions using SVM
svm_df = pd.DataFrame({'Date': future_forcast_dates[-10:], 'SVM Predicted # of Confirmed Cases Worldwide': np.round(svm_pred[-10:])})
svm_df.style.background_gradient(cmap='Reds')
| Date | SVM Predicted # of Confirmed Cases Worldwide | |
|---|---|---|
| 0 | 01/23/2021 | 103356037.000000 |
| 1 | 01/24/2021 | 104189868.000000 |
| 2 | 01/25/2021 | 105028244.000000 |
| 3 | 01/26/2021 | 105871175.000000 |
| 4 | 01/27/2021 | 106718675.000000 |
| 5 | 01/28/2021 | 107570757.000000 |
| 6 | 01/29/2021 | 108427432.000000 |
| 7 | 01/30/2021 | 109288712.000000 |
| 8 | 01/31/2021 | 110154611.000000 |
| 9 | 02/01/2021 | 111025140.000000 |
# Future predictions using polynomial regression
linear_pred = linear_pred.reshape(1,-1)[0]
linear_df = pd.DataFrame({'Date': future_forcast_dates[-10:], 'Polynomial Predicted # of Confirmed Cases Worldwide': np.round(linear_pred[-10:])})
linear_df.style.background_gradient(cmap='Reds')
| Date | Polynomial Predicted # of Confirmed Cases Worldwide | |
|---|---|---|
| 0 | 01/23/2021 | 78460389.000000 |
| 1 | 01/24/2021 | 78861293.000000 |
| 2 | 01/25/2021 | 79261726.000000 |
| 3 | 01/26/2021 | 79661664.000000 |
| 4 | 01/27/2021 | 80061084.000000 |
| 5 | 01/28/2021 | 80459962.000000 |
| 6 | 01/29/2021 | 80858273.000000 |
| 7 | 01/30/2021 | 81255993.000000 |
| 8 | 01/31/2021 | 81653097.000000 |
| 9 | 02/01/2021 | 82049563.000000 |
# Future predictions using Bayesian Ridge
bayesian_df = pd.DataFrame({'Date': future_forcast_dates[-10:], 'Bayesian Ridge Predicted # of Confirmed Cases Worldwide': np.round(bayesian_pred[-10:])})
bayesian_df.style.background_gradient(cmap='Reds')
| Date | Bayesian Ridge Predicted # of Confirmed Cases Worldwide | |
|---|---|---|
| 0 | 01/23/2021 | 82173009.000000 |
| 1 | 01/24/2021 | 82674851.000000 |
| 2 | 01/25/2021 | 83178229.000000 |
| 3 | 01/26/2021 | 83683146.000000 |
| 4 | 01/27/2021 | 84189606.000000 |
| 5 | 01/28/2021 | 84697611.000000 |
| 6 | 01/29/2021 | 85207167.000000 |
| 7 | 01/30/2021 | 85718277.000000 |
| 8 | 01/31/2021 | 86230946.000000 |
| 9 | 02/01/2021 | 86745176.000000 |
mean_mortality_rate = np.mean(mortality_rate)
plt.figure(figsize=(16, 10))
plt.plot(adjusted_dates, mortality_rate, color='orange')
plt.axhline(y = mean_mortality_rate,linestyle='--', color='black')
plt.title('Worldwide Mortality Rate of Coronavirus Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('Case Mortality Rate', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
mean_recovery_rate = np.mean(recovery_rate)
plt.figure(figsize=(16, 10))
plt.plot(adjusted_dates, recovery_rate, color='blue')
plt.title('Worldwide Recovery Rate of Coronavirus Over Time', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('Case Recovery Rate', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
plt.figure(figsize=(16, 10))
plt.plot(adjusted_dates, total_deaths, color='r')
plt.plot(adjusted_dates, total_recovered, color='green')
plt.legend(['death', 'recoveries'], loc='best', fontsize=25)
plt.title('Worldwide Coronavirus Cases', size=30)
plt.xlabel('Days Since 1/22/2020', size=30)
plt.ylabel('# of Cases', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
plt.figure(figsize=(16, 10))
plt.plot(total_recovered, total_deaths)
plt.title('# of Coronavirus Deaths vs. # of Coronavirus Recoveries', size=30)
plt.xlabel('# of Coronavirus Recoveries', size=30)
plt.ylabel('# of Coronavirus Deaths', size=30)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
unique_countries = list(latest_data['Country_Region'].unique())
country_confirmed_cases = []
country_death_cases = []
country_active_cases = []
country_recovery_cases = []
country_incidence_rate = []
country_mortality_rate = []
no_cases = []
for i in unique_countries:
cases = latest_data[latest_data['Country_Region']==i]['Confirmed'].sum()
if cases > 0:
country_confirmed_cases.append(cases)
else:
no_cases.append(i)
for i in no_cases:
unique_countries.remove(i)
# sort countries by the number of confirmed cases
unique_countries = [k for k, v in sorted(zip(unique_countries, country_confirmed_cases), key=operator.itemgetter(1), reverse=True)]
for i in range(len(unique_countries)):
country_confirmed_cases[i] = latest_data[latest_data['Country_Region']==unique_countries[i]]['Confirmed'].sum()
country_death_cases.append(latest_data[latest_data['Country_Region']==unique_countries[i]]['Deaths'].sum())
country_recovery_cases.append(latest_data[latest_data['Country_Region']==unique_countries[i]]['Recovered'].sum())
country_active_cases.append(latest_data[latest_data['Country_Region']==unique_countries[i]]['Active'].sum())
country_incidence_rate.append(latest_data[latest_data['Country_Region']==unique_countries[i]]['Incidence_Rate'].sum())
country_mortality_rate.append(country_death_cases[i]/country_confirmed_cases[i])
total_world_cases = np.sum(country_confirmed_cases)
us_confirmed = latest_data[latest_data['Country_Region']=='US']['Confirmed'].sum()
outside_us_confirmed = total_world_cases - us_confirmed
plt.figure(figsize=(16, 9))
plt.barh('United States', us_confirmed)
plt.barh('Outside United States', outside_us_confirmed)
plt.title('# of Total Coronavirus Confirmed Cases', size=20)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
plt.figure(figsize=(16, 9))
plt.barh('United States', us_confirmed/total_world_cases)
plt.barh('Outside United States', outside_us_confirmed/total_world_cases)
plt.title('# of Coronavirus Confirmed Cases Expressed in Percentage', size=20)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
print('Outside United States {} cases:'.format(outside_us_confirmed))
print('United States {} cases'.format(us_confirmed))
print('Total: {} cases'.format(us_confirmed+outside_us_confirmed))
Outside United States 32548141 cases: United States 8302170 cases Total: 40850311 cases
# Only show 15 countries with the most confirmed cases, the rest are grouped into the other category
visual_unique_countries = []
visual_confirmed_cases = []
others = np.sum(country_confirmed_cases[10:])
for i in range(len(country_confirmed_cases[:10])):
visual_unique_countries.append(unique_countries[i])
visual_confirmed_cases.append(country_confirmed_cases[i])
visual_unique_countries.append('Others')
visual_confirmed_cases.append(others)
def plot_bar_graphs(x, y, title):
plt.figure(figsize=(16, 12))
plt.barh(x, y)
plt.title(title, size=20)
plt.xticks(size=20)
plt.yticks(size=20)
plt.show()
# good for a lot x values
def plot_bar_graphs_tall(x, y, title):
plt.figure(figsize=(19, 18))
plt.barh(x, y)
plt.title(title, size=25)
plt.xticks(size=25)
plt.yticks(size=25)
plt.show()
plot_bar_graphs(visual_unique_countries, visual_confirmed_cases, '# of Covid-19 Confirmed Cases in Countries/Regions')
log_country_confirmed_cases = [math.log10(i) for i in visual_confirmed_cases]
plot_bar_graphs(visual_unique_countries, log_country_confirmed_cases, 'Common Log # of Coronavirus Confirmed Cases in Countries/Regions')
unique_provinces = list(latest_data['Province_State'].unique())
Getting the latest information about provinces/states that have confirmed coronavirus cases
province_confirmed_cases = []
province_country = []
province_death_cases = []
# province_recovery_cases = []
province_active = []
province_incidence_rate = []
province_mortality_rate = []
no_cases = []
for i in unique_provinces:
cases = latest_data[latest_data['Province_State']==i]['Confirmed'].sum()
if cases > 0:
province_confirmed_cases.append(cases)
else:
no_cases.append(i)
# remove areas with no confirmed cases
for i in no_cases:
unique_provinces.remove(i)
unique_provinces = [k for k, v in sorted(zip(unique_provinces, province_confirmed_cases), key=operator.itemgetter(1), reverse=True)]
for i in range(len(unique_provinces)):
province_confirmed_cases[i] = latest_data[latest_data['Province_State']==unique_provinces[i]]['Confirmed'].sum()
province_country.append(latest_data[latest_data['Province_State']==unique_provinces[i]]['Country_Region'].unique()[0])
province_death_cases.append(latest_data[latest_data['Province_State']==unique_provinces[i]]['Deaths'].sum())
# province_recovery_cases.append(latest_data[latest_data['Province_State']==unique_provinces[i]]['Recovered'].sum())
province_active.append(latest_data[latest_data['Province_State']==unique_provinces[i]]['Active'].sum())
province_incidence_rate.append(latest_data[latest_data['Province_State']==unique_provinces[i]]['Incidence_Rate'].sum())
province_mortality_rate.append(province_death_cases[i]/province_confirmed_cases[i])
# number of cases per province/state/city top 100
province_limit = 100
province_df = pd.DataFrame({'Province/State Name': unique_provinces[:province_limit], 'Country': province_country[:province_limit], 'Number of Confirmed Cases': province_confirmed_cases[:province_limit],
'Number of Deaths': province_death_cases[:province_limit],'Number of Active Cases' : province_active[:province_limit],
'Incidence Rate' : province_incidence_rate[:province_limit], 'Mortality Rate': province_mortality_rate[:province_limit]})
# number of cases per country/region
province_df.style.background_gradient(cmap='Oranges')
| Province/State Name | Country | Number of Confirmed Cases | Number of Deaths | Number of Active Cases | Incidence Rate | Mortality Rate | |
|---|---|---|---|---|---|---|---|
| 0 | Maharashtra | India | 1609516 | 42453 | 174755.000000 | 1307.017058 | 0.026376 |
| 1 | Sao Paulo | Brazil | 1068962 | 38246 | 122517.000000 | 2327.927131 | 0.035779 |
| 2 | California | US | 883746 | 17055 | 866691.000000 | 101792.633443 | 0.019299 |
| 3 | Texas | US | 870347 | 17629 | 844719.000000 | 642622.496614 | 0.020255 |
| 4 | Andhra Pradesh | India | 789553 | 6481 | 33396.000000 | 1464.755660 | 0.008208 |
| 5 | Karnataka | India | 776901 | 10608 | 103964.000000 | 1149.896557 | 0.013654 |
| 6 | Florida | US | 760389 | 16105 | 744284.000000 | 266979.355801 | 0.021180 |
| 7 | Tamil Nadu | India | 694030 | 10741 | 36734.000000 | 891.596484 | 0.015476 |
| 8 | England | United Kingdom | 647025 | 38996 | 608029.000000 | 1155.872391 | 0.060270 |
| 9 | New York | US | 490166 | 33582 | 453114.000000 | 75039.637823 | 0.068511 |
| 10 | Uttar Pradesh | India | 459154 | 6714 | 30416.000000 | 193.016958 | 0.014623 |
| 11 | Lima | Peru | 394933 | 15212 | 379721.000000 | 3715.792445 | 0.038518 |
| 12 | Moscow | Russia | 372628 | 6058 | 88962.000000 | 2979.482297 | 0.016258 |
| 13 | Illinois | US | 354457 | 9537 | 344920.000000 | 225801.931860 | 0.026906 |
| 14 | Kerala | India | 353472 | 1206 | 92023.000000 | 990.133095 | 0.003412 |
| 15 | Georgia | US | 342438 | 7674 | 334764.000000 | 548043.120834 | 0.022410 |
| 16 | Minas Gerais | Brazil | 338107 | 8483 | 43344.000000 | 1597.195607 | 0.025090 |
| 17 | Bahia | Brazil | 337994 | 7363 | 19577.000000 | 2272.524343 | 0.021784 |
| 18 | Delhi | India | 336750 | 6081 | 23922.000000 | 1799.750969 | 0.018058 |
| 19 | West Bengal | India | 329057 | 6180 | 35170.000000 | 330.347658 | 0.018781 |
| 20 | Capital District | Colombia | 299713 | 7345 | 23213.000000 | 4043.309699 | 0.024507 |
| 21 | Metropolitana | Chile | 293436 | 9650 | 3191.000000 | 4125.459312 | 0.032886 |
| 22 | Rio de Janeiro | Brazil | 292621 | 19836 | 13857.000000 | 1694.885410 | 0.067787 |
| 23 | Madrid | Spain | 283130 | 10059 | 232335.000000 | 4262.947349 | 0.035528 |
| 24 | Odisha | India | 272250 | 1168 | 18885.000000 | 587.298383 | 0.004290 |
| 25 | Ceara | Brazil | 266289 | 9218 | 33790.000000 | 2915.973779 | 0.034617 |
| 26 | North Carolina | US | 248750 | 3992 | 244758.000000 | 243104.981892 | 0.016048 |
| 27 | Para | Brazil | 242129 | 6686 | 13446.000000 | 2814.515862 | 0.027613 |
| 28 | Goias | Brazil | 240165 | 5444 | 16544.000000 | 3421.956202 | 0.022668 |
| 29 | Santa Catarina | Brazil | 237781 | 2991 | 21295.000000 | 3318.744393 | 0.012579 |
| 30 | Tennessee | US | 233569 | 2952 | 230617.000000 | 352427.400653 | 0.012639 |
| 31 | Arizona | US | 232937 | 5837 | 227101.000000 | 49699.421894 | 0.025058 |
| 32 | Punjab | India | 230526 | 6356 | 7241.000000 | 519.281512 | 0.027572 |
| 33 | Telangana | India | 226124 | 1287 | 20449.000000 | 574.462159 | 0.005692 |
| 34 | Rio Grande do Sul | Brazil | 225694 | 5452 | 21554.000000 | 1983.732609 | 0.024157 |
| 35 | New Jersey | US | 222193 | 16227 | 205966.000000 | 46531.730006 | 0.073031 |
| 36 | Bihar | India | 207405 | 1011 | 11348.000000 | 166.190002 | 0.004875 |
| 37 | Distrito Federal | Brazil | 206265 | 3575 | 12883.000000 | 6840.685471 | 0.017332 |
| 38 | Assam | India | 202073 | 884 | 26775.000000 | 567.508576 | 0.004375 |
| 39 | Parana | Brazil | 201877 | 4999 | 56778.000000 | 1765.591737 | 0.024763 |
| 40 | Pennsylvania | US | 189970 | 8516 | 181454.000000 | 67604.603834 | 0.044828 |
| 41 | Wisconsin | US | 187828 | 1633 | 176849.000000 | 195583.720282 | 0.008694 |
| 42 | Ohio | US | 185639 | 5083 | 180556.000000 | 120884.690444 | 0.027381 |
| 43 | Maranhao | Brazil | 182020 | 3949 | 10945.000000 | 2572.655032 | 0.021695 |
| 44 | Catalonia | Spain | 179552 | 5919 | 147430.000000 | 2373.007829 | 0.032965 |
| 45 | Rajasthan | India | 177123 | 1774 | 20254.000000 | 218.582158 | 0.010016 |
| 46 | Louisiana | US | 176681 | 5772 | 170909.000000 | 266278.244651 | 0.032669 |
| 47 | Alabama | US | 175210 | 2805 | 171723.000000 | 261349.190923 | 0.016009 |
| 48 | Amazonas | Brazil | 171504 | 4701 | 44149.000000 | 11059.067568 | 0.027410 |
| 49 | Virginia | US | 167754 | 3485 | 163788.000000 | 261165.378883 | 0.020774 |
| 50 | Michigan | US | 165994 | 7383 | 158611.000000 | 92792.378118 | 0.044478 |
| 51 | South Carolina | US | 165493 | 3696 | 161797.000000 | 156224.781893 | 0.022333 |
| 52 | Chhattisgarh | India | 165279 | 1584 | 25709.000000 | 561.481529 | 0.009584 |
| 53 | Madhya Pradesh | India | 162178 | 2811 | 12507.000000 | 189.995275 | 0.017333 |
| 54 | Missouri | US | 161849 | 2625 | 159224.000000 | 271395.557107 | 0.016219 |
| 55 | Gujarat | India | 161686 | 3651 | 14245.000000 | 253.139075 | 0.022581 |
| 56 | Pernambuco | Brazil | 156794 | 8505 | 17154.000000 | 1640.607253 | 0.054243 |
| 57 | Haryana | India | 152174 | 1660 | 10078.000000 | 539.534344 | 0.010909 |
| 58 | Indiana | US | 150664 | 4008 | 146656.000000 | 176203.364962 | 0.026602 |
| 59 | Ciudad de Mexico | Mexico | 149109 | 14480 | 9159.000000 | 1653.341494 | 0.097110 |
| 60 | Espirito Santo | Brazil | 146233 | 3740 | 14658.000000 | 3638.858821 | 0.025576 |
| 61 | Antioquia | Colombia | 146086 | 2878 | 9223.000000 | 2280.063592 | 0.019701 |
| 62 | Massachusetts | US | 145464 | 9758 | 135706.000000 | 21221.281063 | 0.067082 |
| 63 | Sindh | Pakistan | 142348 | 2587 | 4267.000000 | 297.264020 | 0.018174 |
| 64 | Maryland | US | 136744 | 4050 | 132694.000000 | 42582.380890 | 0.029617 |
| 65 | Mato Grosso | Brazil | 136579 | 3690 | 20671.000000 | 3919.653686 | 0.027017 |
| 66 | Lombardia | Italy | 130479 | 17103 | 26304.000000 | 1296.933952 | 0.131079 |
| 67 | Paraiba | Brazil | 129045 | 3010 | 24962.000000 | 3211.570963 | 0.023325 |
| 68 | Minnesota | US | 125531 | 2299 | 123232.000000 | 177406.003989 | 0.018314 |
| 69 | Mississippi | US | 111322 | 3202 | 108120.000000 | 349563.593841 | 0.028763 |
| 70 | Oklahoma | US | 109548 | 1191 | 108357.000000 | 197035.225800 | 0.010872 |
| 71 | Iowa | US | 109202 | 1576 | 107626.000000 | 316262.046215 | 0.014432 |
| 72 | Piaui | Brazil | 106908 | 2304 | 5349.000000 | 3266.134613 | 0.021551 |
| 73 | Washington | US | 101864 | 2237 | 96868.000000 | 53489.461882 | 0.021961 |
| 74 | Arkansas | US | 100441 | 1728 | 98713.000000 | 256353.408944 | 0.017204 |
| 75 | Jharkhand | India | 97414 | 849 | 6180.000000 | 252.407450 | 0.008715 |
| 76 | Utah | US | 96643 | 551 | 96092.000000 | 31900.032580 | 0.005701 |
| 77 | Andalusia | Spain | 96224 | 2176 | 83377.000000 | 1141.798691 | 0.022614 |
| 78 | Quebec | Canada | 95216 | 6055 | 8693.000000 | 1115.245206 | 0.063592 |
| 79 | Nordrhein-Westfalen | Germany | 94883 | 1970 | 20106.000000 | 529.107492 | 0.020762 |
| 80 | Mexico | Mexico | 92547 | 10285 | 2889.000000 | 531.031186 | 0.111133 |
| 81 | Nevada | US | 91499 | 1727 | 89772.000000 | 19124.917987 | 0.018875 |
| 82 | Alagoas | Brazil | 89644 | 2187 | 1900.000000 | 2686.077636 | 0.024397 |
| 83 | Kentucky | US | 89544 | 1342 | 88202.000000 | 201824.614662 | 0.014987 |
| 84 | Jammu and Kashmir | India | 88958 | 1397 | 8124.000000 | 653.799117 | 0.015704 |
| 85 | Colorado | US | 87582 | 2182 | 85377.000000 | 73942.378940 | 0.024914 |
| 86 | Sergipe | Brazil | 82301 | 2145 | 7126.000000 | 3580.334242 | 0.026063 |
| 87 | Bayern | Germany | 82102 | 2726 | 11535.000000 | 627.848526 | 0.033203 |
| 88 | Moscow Oblast | Russia | 81377 | 1490 | 19135.000000 | 1084.537179 | 0.018310 |
| 89 | Rio Grande do Norte | Brazil | 78704 | 2553 | 34111.000000 | 2244.291392 | 0.032438 |
| 90 | Mato Grosso do Sul | Brazil | 77943 | 1512 | 7802.000000 | 2804.728055 | 0.019399 |
| 91 | Kansas | US | 76048 | 895 | 71284.000000 | 249620.681021 | 0.011769 |
| 92 | Valle del Cauca | Colombia | 75172 | 2603 | 5620.000000 | 1679.488709 | 0.034627 |
| 93 | Tocantins | Brazil | 73065 | 1055 | 16311.000000 | 4645.341688 | 0.014439 |
| 94 | Zuid-Holland | Netherlands | 71770 | 1536 | 70234.000000 | 1935.181530 | 0.021402 |
| 95 | Atlantico | Colombia | 69991 | 3110 | 1233.000000 | 2760.423219 | 0.044434 |
| 96 | Rondonia | Brazil | 69332 | 1429 | 7714.000000 | 3901.138010 | 0.020611 |
| 97 | Ontario | Canada | 68556 | 3107 | 6114.000000 | 465.992429 | 0.045321 |
| 98 | Hubei | China | 68139 | 4512 | 0.000000 | 115.158019 | 0.066218 |
| 99 | Castilla y Leon | Spain | 65158 | 3380 | 53062.000000 | 2706.197074 | 0.051874 |
# Only show 10 provinces with the most confirmed cases, the rest are grouped into the other category
visual_unique_provinces = []
visual_confirmed_cases2 = []
others = np.sum(province_confirmed_cases[10:])
for i in range(len(province_confirmed_cases[:10])):
visual_unique_provinces.append(unique_provinces[i])
visual_confirmed_cases2.append(province_confirmed_cases[i])
visual_unique_provinces.append('Others')
visual_confirmed_cases2.append(others)
plot_bar_graphs(visual_unique_provinces, visual_confirmed_cases2, '# of Coronavirus Confirmed Cases in Provinces/States')
log_province_confirmed_cases = [math.log10(i) for i in visual_confirmed_cases2]
plot_bar_graphs(visual_unique_provinces, log_province_confirmed_cases, 'Log of # of Coronavirus Confirmed Cases in Provinces/States')
def plot_pie_charts(x, y, title):
# more muted color
c = ['lightcoral', 'rosybrown', 'sandybrown', 'navajowhite', 'gold',
'khaki', 'lightskyblue', 'turquoise', 'lightslategrey', 'thistle', 'pink']
plt.figure(figsize=(20,15))
plt.title(title, size=20)
plt.pie(y, colors=c,shadow=True, labels=y)
plt.legend(x, loc='best', fontsize=12)
plt.show()
plot_pie_charts(visual_unique_countries, visual_confirmed_cases, 'Covid-19 Confirmed Cases per Country')
plot_pie_charts(visual_unique_provinces, visual_confirmed_cases2, 'Covid-19 Confirmed Cases per State/Province/Region')
# Plotting countries with regional data using a pie chart
def plot_pie_country_with_regions(country_name, title):
regions = list(latest_data[latest_data['Country_Region']==country_name]['Province_State'].unique())
confirmed_cases = []
no_cases = []
for i in regions:
cases = latest_data[latest_data['Province_State']==i]['Confirmed'].sum()
if cases > 0:
confirmed_cases.append(cases)
else:
no_cases.append(i)
# remove areas with no confirmed cases
for i in no_cases:
regions.remove(i)
# only show the top 5 states
regions = [k for k, v in sorted(zip(regions, confirmed_cases), key=operator.itemgetter(1), reverse=True)]
for i in range(len(regions)):
confirmed_cases[i] = latest_data[latest_data['Province_State']==regions[i]]['Confirmed'].sum()
# additional province/state will be considered "others"
if(len(regions)>5):
regions_5 = regions[:5]
regions_5.append('Others')
confirmed_cases_5 = confirmed_cases[:5]
confirmed_cases_5.append(np.sum(confirmed_cases[5:]))
plot_pie_charts(regions_5,confirmed_cases_5, title)
else:
plot_pie_charts(regions,confirmed_cases, title)
pie_chart_countries = ['Switzerland','Canada','Singapore','US','Australia', 'Germany','United Kingdom']
for i in pie_chart_countries:
plot_pie_country_with_regions(i, 'Covid-19 Confirmed Cases in {}'.format(i))
# Replace nan with 0
us_medical_data.fillna(value=0, inplace=True)
def plot_us_medical_data():
states = us_medical_data['Province_State'].unique()
testing_number = []
testing_rate = []
for i in states:
testing_number.append(us_medical_data[us_medical_data['Province_State']==i]['People_Tested'].sum())
testing_rate.append(us_medical_data[us_medical_data['Province_State']==i]['Testing_Rate'].max())
# only show the top 15 states
testing_states = [k for k, v in sorted(zip(states, testing_number), key=operator.itemgetter(1), reverse=True)]
testing_rate_states = [k for k, v in sorted(zip(states, testing_rate), key=operator.itemgetter(1), reverse=True)]
for i in range(len(states)):
testing_number[i] = us_medical_data[us_medical_data['Province_State']==testing_states[i]]['People_Tested'].sum()
testing_rate[i] = us_medical_data[us_medical_data['Province_State']==testing_rate_states[i]]['Testing_Rate'].sum()
top_limit = 30
plot_bar_graphs_tall(testing_states[:top_limit], testing_number[:top_limit], 'Total Testing per State (Top 30)')
plot_bar_graphs_tall(testing_rate_states[:top_limit], testing_rate[:top_limit], 'Testing Rate per 100,000 People (Top 30)')
plot_us_medical_data()